Feed forward Neural Networks - with variable scopes and summaries


Lets load the MNIST dataset first


In [1]:
# Necessary imports
import time
from IPython import display

import numpy as np
from matplotlib.pyplot import imshow
from PIL import Image, ImageOps
import tensorflow as tf

%matplotlib inline

from tensorflow.examples.tutorials.mnist import input_data

# Read the mnist dataset
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)


Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz

In [2]:
num_features = 784
num_classes = 10
log_path = "../logs/ffnn"


# Hyperparameters
batch_size = 100
learning_rate = 0.01
training_epochs = 5

# Layer sizes
layer_1_size = 256
layer_2_size = 128

# Create placeholders
x = tf.placeholder(tf.float32, shape=(None, 784))
y = tf.placeholder(tf.float32, shape=(None, 10))

Create a function to get all the statistics of a variable and create summaries. This will be used to write summaries for things like weight vectors and biases.


In [3]:
# Code from - https://www.tensorflow.org/get_started/summaries_and_tensorboard
def variable_summaries(var):
    # Attach a scope for the summary
    with tf.name_scope("summaries"):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

Create a function for a neural network layer


In [4]:
def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
    # Scope the entire layer with its name
    with tf.name_scope(layer_name):
        # Scope for the weights
        with tf.name_scope("weights"):
            weights = tf.Variable(tf.truncated_normal([input_dim, output_dim], stddev=0.1))
            variable_summaries(weights)
        # Scope for biases
        with tf.name_scope("biases"):
            biases = tf.Variable(tf.constant(0.1, shape=([output_dim])))
            variable_summaries(biases)
        # Scope for preactivations
        with tf.name_scope("preacts"):
            preacts = tf.add(tf.matmul(input_tensor, weights), biases)
            tf.summary.histogram('pre_activations', preacts)
        # Scope for activations 
        with tf.name_scope("activations"):
            activations = act(preacts, name="activation")
            tf.summary.histogram('activations', activations)
            
        return activations

Build the network


In [5]:
# Build the network
hidden1 = nn_layer(x, num_features, layer_1_size, "layer1")
hidden2 = nn_layer(hidden1, layer_1_size, layer_2_size, "layer2")

# Final layer - Use tf.identity to make sure there are no activations
logits = nn_layer(hidden2, layer_2_size, num_classes, "softmax", act=tf.identity)

Compute the Loss


In [6]:
# Compute the cross entropy
with tf.name_scope('cross_entropy'):
    deltas = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
    with tf.name_scope('total'):
        cross_entropy_loss = tf.reduce_mean(deltas)
        tf.summary.scalar('cross_entropy', cross_entropy_loss)

Define the optimizer


In [7]:
with tf.name_scope('train_step'):
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy_loss)

Compute the accuracies


In [8]:
with tf.name_scope("evaluation"):
    with tf.name_scope("correct_prediction"):
        correct_predictions = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
    with tf.name_scope("accuracy"):
        accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

Now lets run our graph as usual


In [9]:
# Initializing global variables
init = tf.global_variables_initializer()

# Merge all the summaries
summaries = tf.summary.merge_all()

# Create a session to run the graph
with tf.Session() as sess:
    # Run initialization
    sess.run(init)
    
    summary_writer = tf.summary.FileWriter(log_path, sess.graph)
    
    # For the set number of epochs
    for epoch in range(training_epochs):
        
        # Compute the total number of batches
        num_batches = int(mnist.train.num_examples/batch_size)
        
        # Iterate over all the examples (1 epoch)
        for batch in range(num_batches):
            
            # Get a batch of examples
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)

            # Now run the session 
            curr_loss, cur_accuracy, _, summary = sess.run([cross_entropy_loss, accuracy, train_step, summaries], 
                                                            feed_dict={x: batch_xs, y: batch_ys})
            
            
            if batch % 50 == 0:
                # Write the log after each iteration
                summary_writer.add_summary(summary, epoch * num_batches + batch)
                display.clear_output(wait=True)
#                 time.sleep(0.05)
                # Print the loss
                print("Epoch: %d/%d. Batch: %d/%d. Current loss: %.5f. Train Accuracy: %.2f"
                      %(epoch, training_epochs, batch, num_batches, curr_loss, cur_accuracy))

            
    # Run the session to compute the value and print it
    test_accuracy = sess.run(accuracy,
                                       feed_dict={x: mnist.test.images, 
                                                  y: mnist.test.labels})
    print("Test Accuracy: %.2f"%test_accuracy)


Epoch: 4/5. Batch: 500/550. Current loss: 0.19516. Train Accuracy: 0.94
Test Accuracy: 0.93